Global Code and Functions¶
Run this first to import modules and global functions
#Modules
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
from IPython.display import Image, display
import re
import math
import torch
import os
import cv2
from collections import Counter
import pandas as pd
# Models
from scipy.spatial import distance
from sklearn.neighbors import NearestNeighbors
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from transformers import CLIPProcessor, CLIPModel
from sklearn.metrics.pairwise import cosine_similarity
# Global Functions
def get_top50_ann(target_embedding, embeddings):
nn = NearestNeighbors(n_neighbors=51, metric='cosine', algorithm='brute')
nn.fit(embeddings)
distances, indices = nn.kneighbors([target_embedding])
return indices[0][1:] # Skip the first index because it's the target itself
def get_top50_euclidean(target_embedding, embeddings):
distances = [distance.euclidean(target_embedding, emb) for emb in embeddings]
indices = np.argsort(distances)[1:51] # Skip the first index because it's the target itself
return indices
def display_image(index):
display(Image(filename=f"thumbnails_folder2large/{g_movie_embeddings[index]['input']}"))
def display_images(indices, embeddings):
fig, axes = plt.subplots(10, 10, figsize=(20, 10))
for i, ax in enumerate(axes.flat):
ax.imshow(plt.imread(f"thumbnails_folder2large/{g_movie_embeddings[indices[i]]['input']}"))
ax.axis('off')
plt.show()
def display_images_first_x_last_x(indices, first_x, last_x, cluster_n=0):
# Select the first_x and last_x indices
selected_indices = indices[:first_x] + indices[-last_x:]
# Calculate the number of rows and columns for the subplot
total_images = first_x + last_x
cols = 10
rows = math.ceil(total_images / cols)
fig, axes = plt.subplots(rows, cols, figsize=(20, 2 * rows))
axes = axes.ravel() # Flatten the axes array
# Hide all axes
for ax in axes:
ax.axis('off')
# Display images on the first len(selected_indices) axes
for i, idx in enumerate(selected_indices):
axes[i].imshow(plt.imread(f"thumbnails_folder2large/{g_movie_embeddings[idx]['input']}"))
axes[i].axis('on')
plt.tight_layout()
plt.title(f"Cluster {cluster_n} - First {first_x} and Last {last_x} Images - Total Images in Cluster: {len(indices)}")
plt.show()
def display_cluster_images(cluster_labels, cluster_number):
# Get indices of images in the cluster
indices = [i for i, label in enumerate(cluster_labels) if label == cluster_number]
# Display images
display_images(indices)
def display_cluster_images_first_last_x(cluster_labels, cluster_number, first_x, last_x):
# Get indices of images in the cluster
indices = [i for i, label in enumerate(cluster_labels) if label == cluster_number]
# Display images
display_images_first_x_last_x(indices, first_x, last_x, cluster_number)
def find_and_remove_intro_and_subtitles(g_only_embeddings, threshold=0.7):
# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
# Get the text embeddings for the intro and subtitles
inputs = processor(text=["Image of Walt Disney Movie Intro", "Walt Disney Movie Intro", "Movie closing credits", "Movie end credits", "Image with lots of closing credits"], return_tensors="pt", padding=True)
text_embeddings = model.get_text_features(**inputs)
text_embeddings_np = text_embeddings.detach().numpy()
# Calculate the cosine similarity between the text embeddings and the movie embeddings
similarities = cosine_similarity(text_embeddings_np, g_only_embeddings)
# Find the indices of the embeddings that are similar to the intro and subtitles
intro_subtitle_indices = np.where(similarities.max(axis=0) > threshold)[0]
#print("Number of images(Intro and Closing credits) to remove:", intro_subtitle_indices)
# Create new lists that exclude the intro and subtitles
new_g_movie_embeddings = [emb for i, emb in enumerate(g_movie_embeddings) if i not in intro_subtitle_indices]
new_g_only_embeddings = np.array([emb for i, emb in enumerate(g_only_embeddings) if i not in intro_subtitle_indices])
return new_g_movie_embeddings, new_g_only_embeddings
# Global Variables
g_movie_embeddings = json.load(open("honey_i_shrunk_the_kids_movie_embeddings_1_second.json"))
g_only_embeddings = np.array([emb['embedding'] for emb in g_movie_embeddings])
g_movie_embeddings, g_only_embeddings = find_and_remove_intro_and_subtitles(g_only_embeddings, threshold=0.237)
Hypothesis 1¶
dog_idx = 5038
mower_idx = 4733
ant_idx = 3230
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
with open("honey_i_shrunk_the_kids_movie_embeddings_1_second.json", 'r') as file:
movie_embeddings = json.load(file)
def euclidean_distance(array1, array2):
# Convert the arrays to NumPy arrays
array1_np = np.array(array1)
array2_np = np.array(array2)
# Calculate the Euclidean distance
distance = np.linalg.norm(array1_np - array2_np)
return distance
def find_and_display_matches(text_queries, top_k=5):
inputs = processor(text=text_queries, return_tensors="pt", padding=True)
text_embeddings = model.get_text_features(**inputs)
text_embeddings_np = text_embeddings.detach().numpy()
movie_embeddings_np = np.array([movie['embedding'] for movie in movie_embeddings])
similarities = cosine_similarity(text_embeddings_np, movie_embeddings_np)
for index, text_query in enumerate(text_queries):
print(f"Top matches for: {text_query}")
top_indices = np.argsort(similarities[index])[::-1][:top_k]
for i in top_indices:
frame = movie_embeddings[i]['input']
print(f"Displaying frame: {frame}")
display(Image(filename=f'thumbnails_folder2large/{frame}'))
def plot_euclidean_distance_from(target_idx):
target = movie_embeddings[target_idx]
image_path = image_path = 'thumbnails_folder2large/' + target["input"]
# Display the image
display(Image(filename=image_path))
index_to_distance = []
# Iterate through the input list
for emb in movie_embeddings:
current_dist = euclidean_distance(emb["embedding"], target["embedding"])
index_to_distance.append(current_dist)
# Create a plot using Seaborn
sns.set(style="whitegrid") # Set the style
plt.figure(figsize=(10, 6)) # Set the figure size
sns.lineplot(x=range(len(index_to_distance)), y=index_to_distance) # Plot the array with index as x-axis
plt.xlabel("Index") # Set the x-axis label
plt.ylabel("Distance") # Set the y-axis label
plt.title("Distance from Target Over Film") # Set the title
plt.show() # Show the plot
def display_surrounding_frames(target_idx, frame_range=5):
idx_begin = target_idx - frame_range
idx_end = target_idx + frame_range + 1
display_frames = movie_embeddings[idx_begin:idx_end]
for emb in display_frames:
print(f'frame {emb["input"]}')
image_path = 'thumbnails_folder2large/' + emb["input"]
display(Image(filename=image_path))
Exploring the Dog¶
Dislpay the images
display_surrounding_frames(dog_idx)
frame thumbnail_5034.jpg
frame thumbnail_5035.jpg
frame thumbnail_5036.jpg
frame thumbnail_5037.jpg
frame thumbnail_5038.jpg
frame thumbnail_5039.jpg
frame thumbnail_5040.jpg
frame thumbnail_5041.jpg
frame thumbnail_5042.jpg
frame thumbnail_5043.jpg
frame thumbnail_5044.jpg
plot_euclidean_distance_from(dog_idx)
# Using the following to explore 500, 2100 and 3400
#display_surrounding_frames(3400, frame_range=40)
# Discovered the dog in
# thumbnail_0514.jpg
# thumbnail_2078.jpg
# thumbnail_2131.jpg
# thumbnail_3427.jpg
# Indexes are are 0 based
display_surrounding_frames(513, frame_range=2)
frame thumbnail_0512.jpg
frame thumbnail_0513.jpg
frame thumbnail_0514.jpg
frame thumbnail_0515.jpg
frame thumbnail_0516.jpg
The dog is watching the father's scientific experiment.
display_surrounding_frames(2077, frame_range=2)
frame thumbnail_2076.jpg
frame thumbnail_2077.jpg
frame thumbnail_2078.jpg
frame thumbnail_2079.jpg
frame thumbnail_2080.jpg
The dog seems to notice something outside.
display_surrounding_frames(2130, frame_range=2)
frame thumbnail_2129.jpg
frame thumbnail_2130.jpg
frame thumbnail_2131.jpg
frame thumbnail_2132.jpg
frame thumbnail_2133.jpg
The parents are distracted, and the dog wants to investigate what is going on outside, once again exhibiting a higher sense of awareness than the humans.
display_surrounding_frames(3426, frame_range=2)
frame thumbnail_3425.jpg
frame thumbnail_3426.jpg
frame thumbnail_3427.jpg
frame thumbnail_3428.jpg
frame thumbnail_3429.jpg
Here the dog is spinning the father around and disrupting his search for the children, which is an example of a comedic scene.
Exploring the Lawnmower¶
Dislpay the images
display_surrounding_frames(mower_idx)
frame thumbnail_4729.jpg
frame thumbnail_4730.jpg
frame thumbnail_4731.jpg
frame thumbnail_4732.jpg
frame thumbnail_4733.jpg
frame thumbnail_4734.jpg
frame thumbnail_4735.jpg
frame thumbnail_4736.jpg
frame thumbnail_4737.jpg
frame thumbnail_4738.jpg
frame thumbnail_4739.jpg
plot_euclidean_distance_from(mower_idx)
# Display around 790
#display_surrounding_frames(790, frame_range=40)
display_surrounding_frames(790, frame_range=5)
frame thumbnail_0786.jpg
frame thumbnail_0787.jpg
frame thumbnail_0788.jpg
frame thumbnail_0789.jpg
frame thumbnail_0790.jpg
frame thumbnail_0791.jpg
frame thumbnail_0792.jpg
frame thumbnail_0793.jpg
frame thumbnail_0794.jpg
frame thumbnail_0795.jpg
frame thumbnail_0796.jpg
Exploring the Ant¶
Dislpay the images
display_surrounding_frames(ant_idx)
frame thumbnail_3226.jpg
frame thumbnail_3227.jpg
frame thumbnail_3228.jpg
frame thumbnail_3229.jpg
frame thumbnail_3230.jpg
frame thumbnail_3231.jpg
frame thumbnail_3232.jpg
frame thumbnail_3233.jpg
frame thumbnail_3234.jpg
frame thumbnail_3235.jpg
frame thumbnail_3236.jpg
plot_euclidean_distance_from(ant_idx)
# Display around 2180
#display_surrounding_frames(2180, frame_range=40)
display_surrounding_frames(2180, frame_range=1)
frame thumbnail_2180.jpg
frame thumbnail_2181.jpg
frame thumbnail_2182.jpg
# Display around 4400
#display_surrounding_frames(4400, frame_range=40)
display_surrounding_frames(4435, frame_range=1)
frame thumbnail_4435.jpg
frame thumbnail_4436.jpg
frame thumbnail_4437.jpg
The scene around 4435 shows an ant fighting a scorpion and appearing to save the kids.
Use CLIP to ask about 'a photo of an ant fighting a scorpion' to see what it returns.
exploration_query = ['a photo of an ant fighting a scorpion']
find_and_display_matches(exploration_query, top_k=5)
Top matches for: a photo of an ant fighting a scorpion Displaying frame: thumbnail_4433.jpg
Displaying frame: thumbnail_3469.jpg
Displaying frame: thumbnail_4132.jpg
Displaying frame: thumbnail_4133.jpg
Displaying frame: thumbnail_3292.jpg
Cluster Analysis¶
We will compare clustering with t-SNE (t-Distributed Stochastic Neighbor Embedding) and PCA (Principal Component Analysis) dimensionality reduction algorithms.
t-SNE¶
# Using t-SNE to embed the vectors into 2D
tsne = TSNE(n_components=2, random_state=42)
tSNE_embedded_vectors = tsne.fit_transform(g_only_embeddings)
PCA¶
# Using PCA to embed the vectors into 2D
pca = PCA(n_components=2)
PCA_embedded_vectors = pca.fit_transform(g_only_embeddings)
Cluster t-SNE and PCA with K-Means and display Silhoutte Score¶
Silhouette Score: Measures how similar an object is to its own cluster compared to other clusters. The score ranges from -1 to 1, where a high value indicates that the object is well matched to its own cluster and poorly matched to neighboring clusters.
Lets find the best clustering number for t-SNE and PCA...
X = tSNE_embedded_vectors
# Range of clusters to try
num_clusters = range(2, 20)
# List to hold silhouette scores
sil_scores = []
# Loop over number of clusters
for k in num_clusters:
# Perform clustering
kmeans = KMeans(n_init="auto", n_clusters=k, random_state=42).fit(X)
# Get cluster labels
labels = kmeans.labels_
# Compute silhouette score and append to list
sil_score = silhouette_score(X, labels)
sil_scores.append(sil_score)
# Plot silhouette scores
plt.plot(num_clusters, sil_scores, 'bx-')
plt.title('t-SNE')
plt.xlabel('k (number of clusters)')
plt.ylabel('Silhouette Score')
plt.show()
X = PCA_embedded_vectors
# Range of clusters to try
num_clusters = range(2, 20)
# List to hold silhouette scores
sil_scores = []
# Loop over number of clusters
for k in num_clusters:
# Perform clustering
kmeans = KMeans(n_init="auto", n_clusters=k, random_state=42).fit(X)
# Get cluster labels
labels = kmeans.labels_
# Compute silhouette score and append to list
sil_score = silhouette_score(X, labels)
sil_scores.append(sil_score)
# Plot silhouette scores
plt.plot(num_clusters, sil_scores, 'bx-')
plt.title('PCA')
plt.xlabel('k (number of clusters)')
plt.ylabel('Silhouette Score')
plt.show()
Lets cluster with t-SNE and PCA best Silhoutte Scores.
# Performing KMeans clustering with best k silhoutte score.
kmeans = KMeans(n_init="auto", n_clusters=19, random_state=42)
tSNE_clusters = kmeans.fit_predict(tSNE_embedded_vectors)
kmeans = KMeans(n_init="auto", n_clusters=4, random_state=42)
PCA_clusters = kmeans.fit_predict(PCA_embedded_vectors)
# Extracting numbers from file names for labels
labels = [re.search(r'\d+', vector['input']).group() for vector in g_movie_embeddings]
#t-SNE
# Plotting the embedded vectors with cluster coloring
sns.set_theme()
plt.figure(figsize=(12, 8)) # Adjust the figure size as needed
sns.scatterplot(x=tSNE_embedded_vectors[:, 0], y=tSNE_embedded_vectors[:, 1], hue=tSNE_clusters, palette='bright', legend='full', s=100)
for i, vec in enumerate(tSNE_embedded_vectors):
plt.text(vec[0] + 0.02, vec[1] + 0.02, labels[i], fontsize=6) # Adding labels
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.title('t-SNE Embedded Vectors with KMeans Clustering (k=16)')
plt.legend(title='Cluster')
plt.show()
#PCA
# Plotting the embedded vectors with cluster coloring
sns.set_theme()
plt.figure(figsize=(12, 8)) # Adjust the figure size as needed
sns.scatterplot(x=PCA_embedded_vectors[:, 0], y=PCA_embedded_vectors[:, 1], hue=PCA_clusters, palette='bright', legend='full', s=100)
for i, vec in enumerate(PCA_embedded_vectors):
plt.text(vec[0] + 0.02, vec[1] + 0.02, labels[i], fontsize=6) # Adding labels
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.title('PCA Embedded Vectors with KMeans Clustering (k=4)')
plt.legend(title='Cluster')
plt.show()
Let sample images in t-SNE clusters¶
unique_clusters = set(tSNE_clusters)
for cluster in unique_clusters:
display_cluster_images_first_last_x(tSNE_clusters, cluster, 10, 10)
Lets sample images in PCA clusters¶
unique_clusters = set(PCA_clusters)
for cluster in unique_clusters:
display_cluster_images_first_last_x(PCA_clusters, cluster, 10, 10)
t-SNE and PCA with K-means clusters over a timeline¶
By looking at these plots, we can see if frames from the same cluster tend to occur close together in time, which might indicate that the clustering is capturing some meaningful structure in the movie. For example, all the frames from a particular scene might be grouped into the same cluster.
# Convert labels to timestamps by dividing by frame rate
# Assuming `frame_rate` is the frame rate of the movie
frame_rate = 24
timestamps = [int(label) / frame_rate for label in labels]
# Create a timeline plot for the t-SNE clusters
plt.figure(figsize=(12, 6))
plt.scatter(timestamps, tSNE_clusters, c=tSNE_clusters, cmap='viridis')
plt.xlabel('Time')
plt.ylabel('Cluster')
plt.title('t-SNE Clusters Over Time')
plt.colorbar(label='Cluster')
plt.show()
# Create a timeline plot for the PCA clusters
plt.figure(figsize=(12, 6))
plt.scatter(timestamps, PCA_clusters, c=PCA_clusters, cmap='viridis')
plt.xlabel('Time')
plt.ylabel('Cluster')
plt.title('PCA Clusters Over Time')
plt.colorbar(label='Cluster')
plt.show()
import torch
import os
from PIL import Image
import matplotlib.pyplot as plt
import cv2
from collections import Counter
model = torch.hub.load("ultralytics/yolov5", "yolov5s", pretrained=True)
image_directory = "thumbnails_folder2large/"
# List the first 100 images in the image file directory
image_files = [
f
for f in os.listdir(image_directory)
if os.path.isfile(os.path.join(image_directory, f))
]
image_files = image_files[:100]
# Initialize a Counter to keep track of object counts
total_counts = Counter()
for image_file in image_files:
image_path = os.path.join(image_directory, image_file)
results = model(image_path)
print(f"Results for {image_file}:")
df = results.pandas().xyxy[0] # Results as DataFrame
print(df)
# Update the total counts of objects
counts = df["name"].value_counts()
total_counts.update(counts)
results.show()
results.save(save_dir="output/")
object_counts = df["name"].value_counts()
print("Object counts:", object_counts)
print("\n")
Using cache found in /home/stace/.cache/torch/hub/ultralytics_yolov5_master YOLOv5 🚀 2024-4-27 Python-3.10.12 torch-2.3.0+cu121 CPU Fusing layers... YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs Adding AutoShape...
Results for thumbnail_0795.jpg:
xmin ymin xmax ymax confidence class \
0 165.206284 6.893837 215.419052 170.662994 0.905672 0
1 101.212090 35.575703 155.678467 141.318115 0.815038 0
2 100.350342 0.000000 129.249664 26.236727 0.532667 58
3 94.354683 39.541080 110.575081 63.907471 0.360822 58
4 126.063278 13.044321 137.669662 28.871672 0.265851 58
5 86.855400 0.000000 116.105843 25.523857 0.260697 58
name
0 person
1 person
2 potted plant
3 potted plant
4 potted plant
5 potted plant
Saved 1 image to output
Object counts: name potted plant 4 person 2 Name: count, dtype: int64 Results for thumbnail_2778.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output2
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_2599.jpg:
xmin ymin xmax ymax confidence class name
0 39.705357 3.163864 143.259903 170.139664 0.871375 0 person
1 148.172928 66.428223 243.559738 170.312714 0.830257 0 person
2 228.076645 61.408001 319.787598 99.793732 0.372595 25 umbrella
Saved 1 image to output3
Object counts: name
person 2
umbrella 1
Name: count, dtype: int64
Results for thumbnail_5102.jpg:
xmin ymin xmax ymax confidence class name
0 62.999626 0.411362 243.993866 169.815109 0.888487 0 person
1 201.136368 2.532188 319.873016 170.904419 0.368096 0 person
2 201.111191 59.719219 245.755753 106.706238 0.308551 0 person
Saved 1 image to output4
Object counts: name
person 3
Name: count, dtype: int64
Results for thumbnail_2869.jpg:
xmin ymin xmax ymax confidence class name
0 44.419312 2.505157 279.158417 167.806213 0.842688 0 person
Saved 1 image to output5
Object counts: name person 1 Name: count, dtype: int64 Results for thumbnail_4763.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output6
Object counts: Series([], Name: count, dtype: int64) Results for thumbnail_1841.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output7
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_4344.jpg:
xmin ymin xmax ymax confidence class name
0 69.193626 2.837288 314.933716 171.205292 0.704568 0 person
1 2.009521 1.623985 135.926559 170.301331 0.579770 0 person
Saved 1 image to output8
Object counts: name person 2 Name: count, dtype: int64 Results for thumbnail_2830.jpg: xmin ymin xmax ymax confidence class name 0 0.0 2.524231 140.201889 168.760849 0.432279 0 person
Saved 1 image to output9
Object counts: name person 1 Name: count, dtype: int64 Results for thumbnail_0764.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output10
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_4769.jpg:
xmin ymin xmax ymax confidence class name
0 200.240234 44.231087 240.104523 86.270424 0.276669 21 bear
Saved 1 image to output11
Object counts: name
bear 1
Name: count, dtype: int64
Results for thumbnail_5230.jpg:
xmin ymin xmax ymax confidence class name
0 111.571434 53.508400 234.566559 150.327423 0.877162 0 person
1 193.450363 5.458885 289.558289 146.941132 0.750499 0 person
Saved 1 image to output12
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_4402.jpg:
xmin ymin xmax ymax confidence class name
0 58.711868 66.00354 317.49469 168.783325 0.405987 55 cake
Saved 1 image to output13
Object counts: name
cake 1
Name: count, dtype: int64
Results for thumbnail_0838.jpg:
xmin ymin xmax ymax confidence class \
0 10.887814 9.057289 121.294357 170.662140 0.918634 0
1 132.176971 72.398933 225.301208 171.744598 0.889513 0
2 214.722260 101.096077 230.285492 122.825005 0.479683 67
3 262.358063 95.709274 319.863617 172.149094 0.373231 1
4 134.295975 75.295395 146.226730 101.754227 0.364346 58
5 140.245438 44.690025 156.069687 70.838554 0.332648 58
6 127.589920 87.786789 137.921951 104.485153 0.320908 58
7 263.842987 95.603409 320.000000 171.102280 0.270932 58
8 126.522469 107.228600 148.275803 143.455444 0.250760 58
name
0 person
1 person
2 cell phone
3 bicycle
4 potted plant
5 potted plant
6 potted plant
7 potted plant
8 potted plant
Saved 1 image to output14
Object counts: name
potted plant 5
person 2
cell phone 1
bicycle 1
Name: count, dtype: int64
Results for thumbnail_3214.jpg:
xmin ymin xmax ymax confidence class name
0 144.634552 32.005585 317.515961 171.461853 0.928847 0 person
1 20.182396 51.222668 173.023438 170.957565 0.680408 0 person
Saved 1 image to output15
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_3277.jpg:
xmin ymin xmax ymax confidence class name
0 0.053280 49.604023 75.099426 169.629684 0.827735 0 person
1 73.538979 49.266884 160.660828 168.734863 0.820189 0 person
2 154.055847 78.233902 201.372101 170.313568 0.729479 0 person
Saved 1 image to output16
Object counts: name
person 3
Name: count, dtype: int64
Results for thumbnail_1786.jpg:
xmin ymin xmax ymax confidence class name
0 73.028404 55.309113 160.393433 170.868866 0.653283 0 person
Saved 1 image to output17
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_4734.jpg:
xmin ymin xmax ymax confidence class name
0 280.429657 53.827934 319.387726 97.772301 0.737459 56 chair
1 114.833496 40.400749 223.138916 133.413239 0.473742 7 truck
2 190.397873 49.629734 223.321640 63.169189 0.339173 2 car
Saved 1 image to output18
Object counts: name chair 1 truck 1 car 1 Name: count, dtype: int64 Results for thumbnail_1414.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output19
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_0402.jpg:
xmin ymin xmax ymax confidence class name
0 187.116348 98.901291 218.031570 143.041443 0.862557 0 person
1 214.157837 83.272797 240.787567 142.929199 0.589160 0 person
Saved 1 image to output20
Object counts: name person 2 Name: count, dtype: int64 Results for thumbnail_0604.jpg: xmin ymin xmax ymax confidence class name 0 0.0 0.0 267.941467 168.336533 0.653137 0 person
Saved 1 image to output21
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_1578.jpg:
xmin ymin xmax ymax confidence class name
0 179.073669 27.716438 230.254181 168.477631 0.803939 0 person
Saved 1 image to output22
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_3638.jpg:
xmin ymin xmax ymax confidence class name
0 0.524506 2.683617 190.530685 170.092987 0.890239 0 person
1 177.725220 3.032242 318.074402 170.370590 0.403448 15 cat
Saved 1 image to output23
Object counts: name
person 1
cat 1
Name: count, dtype: int64
Results for thumbnail_2130.jpg:
xmin ymin xmax ymax confidence class name
0 66.485329 15.057289 202.649689 163.535858 0.825101 15 cat
Saved 1 image to output24
Object counts: name
cat 1
Name: count, dtype: int64
Results for thumbnail_0904.jpg:
xmin ymin xmax ymax confidence class name
0 143.296783 2.758759 296.191437 169.471558 0.869524 0 person
Saved 1 image to output25
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_1196.jpg:
xmin ymin xmax ymax confidence class name
0 40.405964 90.664719 84.403656 171.545609 0.858486 0 person
1 183.029037 3.579781 319.300720 171.077515 0.574772 0 person
2 138.948303 39.238800 263.657074 170.145706 0.465853 0 person
Saved 1 image to output26
Object counts: name
person 3
Name: count, dtype: int64
Results for thumbnail_4256.jpg:
xmin ymin xmax ymax confidence class name
0 146.819992 9.440872 320.000000 172.989639 0.945141 0 person
1 20.983765 13.298370 161.444916 170.617905 0.918598 0 person
2 86.343025 113.282387 101.466789 170.770935 0.297957 27 tie
Saved 1 image to output27
Object counts: name
person 2
tie 1
Name: count, dtype: int64
Results for thumbnail_4935.jpg:
xmin ymin xmax ymax confidence class name
0 88.580727 26.743401 248.351013 170.39566 0.623986 0 person
Saved 1 image to output28
Object counts: name person 1 Name: count, dtype: int64 Results for thumbnail_2105.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output29
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_0214.jpg:
xmin ymin xmax ymax confidence class \
0 176.159821 8.380219 287.971924 141.245422 0.850449 0
1 294.993713 102.353477 319.953003 145.961212 0.844055 41
2 11.467091 104.170700 123.098709 171.487625 0.587605 15
3 1.286644 72.242310 112.192795 144.937378 0.459279 58
4 258.418427 70.611099 308.974274 114.212433 0.433455 56
5 130.409546 128.525208 142.579285 144.590881 0.351891 41
6 96.898674 80.701042 166.870911 137.720917 0.331840 13
7 266.991577 120.622444 297.645996 145.624557 0.289562 41
8 166.048965 52.058750 179.461533 83.111313 0.265729 39
name
0 person
1 cup
2 cat
3 potted plant
4 chair
5 cup
6 bench
7 cup
8 bottle
Saved 1 image to output30
Object counts: name
cup 3
person 1
cat 1
potted plant 1
chair 1
bench 1
bottle 1
Name: count, dtype: int64
Results for thumbnail_1313.jpg:
xmin ymin xmax ymax confidence class name
0 130.926376 2.710327 318.124573 170.938354 0.923482 0 person
1 205.433197 129.331558 235.381714 173.000000 0.863757 27 tie
Saved 1 image to output31
Object counts: name
person 1
tie 1
Name: count, dtype: int64
Results for thumbnail_4161.jpg:
xmin ymin xmax ymax confidence class name
0 95.329269 1.505486 309.440125 170.497589 0.743999 0 person
Saved 1 image to output32
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_1218.jpg:
xmin ymin xmax ymax confidence class name
0 59.464005 5.452637 199.835617 171.995453 0.924212 0 person
1 166.318619 47.425896 275.354126 171.355835 0.819016 0 person
Saved 1 image to output33
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_3185.jpg:
xmin ymin xmax ymax confidence class name
0 68.541718 22.858665 100.686615 115.881477 0.650451 0 person
Saved 1 image to output34
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_3004.jpg:
xmin ymin xmax ymax confidence class name
0 16.467949 7.679947 185.351685 171.601318 0.788966 0 person
1 177.684647 13.900711 318.367249 171.006119 0.769397 0 person
Saved 1 image to output35
Object counts: name person 2 Name: count, dtype: int64 Results for thumbnail_2117.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output36
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_2194.jpg:
xmin ymin xmax ymax confidence class \
0 128.264832 17.223923 246.417358 157.115829 0.512254 0
1 221.005920 111.913651 320.000000 171.219070 0.356592 77
2 56.877853 40.796303 114.606705 88.694984 0.286311 50
name
0 person
1 teddy bear
2 broccoli
Saved 1 image to output37
Object counts: name person 1 teddy bear 1 broccoli 1 Name: count, dtype: int64 Results for thumbnail_2005.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output38
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_3695.jpg:
xmin ymin xmax ymax confidence class name
0 23.640350 66.985527 170.474426 166.495819 0.357245 16 dog
1 133.369095 29.806076 287.624939 123.378754 0.354583 0 person
Saved 1 image to output39
Object counts: name
dog 1
person 1
Name: count, dtype: int64
Results for thumbnail_3129.jpg:
xmin ymin xmax ymax confidence class name
0 251.405396 58.205311 320.000000 171.264816 0.925341 0 person
1 215.099197 129.501480 260.699585 172.702438 0.904707 0 person
Saved 1 image to output40
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_2384.jpg:
xmin ymin xmax ymax confidence class name
0 87.236252 6.909767 196.455521 171.686859 0.938032 0 person
Saved 1 image to output41
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_1120.jpg:
xmin ymin xmax ymax confidence class name
0 195.474915 33.009022 239.872314 151.447754 0.705667 0 person
1 171.965424 74.845947 215.062439 118.790344 0.590710 26 handbag
2 219.549438 91.001450 245.566772 128.410980 0.546561 28 suitcase
3 190.115112 28.154009 217.299866 87.963417 0.507633 0 person
4 219.012680 90.596786 245.095016 128.418671 0.276464 26 handbag
Saved 1 image to output42
Object counts: name
person 2
handbag 2
suitcase 1
Name: count, dtype: int64
Results for thumbnail_1984.jpg:
xmin ymin xmax ymax confidence class name
0 38.970001 8.946289 189.883606 170.122406 0.928110 0 person
1 207.236069 33.061363 319.765259 166.005798 0.692671 0 person
2 10.784012 140.824539 24.866302 157.100479 0.256825 41 cup
Saved 1 image to output43
Object counts: name person 2 cup 1 Name: count, dtype: int64 Results for thumbnail_0014.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output44
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_1089.jpg:
xmin ymin xmax ymax confidence class name
0 155.791061 7.510849 301.314453 171.474182 0.726388 0 person
1 63.012779 65.291687 154.914551 168.663681 0.533001 16 dog
Saved 1 image to output45
Object counts: name
person 1
dog 1
Name: count, dtype: int64
Results for thumbnail_0095.jpg:
xmin ymin xmax ymax confidence class name
0 70.442627 74.804337 126.911118 171.629593 0.252302 0 person
Saved 1 image to output46
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_3964.jpg:
xmin ymin xmax ymax confidence class name
0 1.730202 2.637009 113.904175 167.680542 0.851113 0 person
1 87.937302 19.548653 228.495743 171.325623 0.835442 0 person
2 221.666534 3.076134 319.228485 171.164429 0.785290 0 person
3 205.645477 0.476009 247.997864 33.175938 0.780001 74 clock
Saved 1 image to output47
Object counts: name
person 3
clock 1
Name: count, dtype: int64
Results for thumbnail_5120.jpg:
xmin ymin xmax ymax confidence class name
0 212.983566 94.568283 227.233231 124.106827 0.484411 0 person
1 187.584137 88.796165 198.898926 123.093910 0.363110 0 person
Saved 1 image to output48
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_0200.jpg:
xmin ymin xmax ymax confidence class name
0 139.897064 0.000000 289.053558 152.858612 0.802378 0 person
1 211.177933 120.213455 256.772552 172.690262 0.668188 41 cup
2 0.000000 82.700356 55.418816 163.130249 0.623903 15 cat
Saved 1 image to output49
Object counts: name person 1 cup 1 cat 1 Name: count, dtype: int64 Results for thumbnail_5513.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output50
Object counts: Series([], Name: count, dtype: int64) Results for thumbnail_0529.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output51
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_2182.jpg:
xmin ymin xmax ymax confidence class name
0 117.981087 5.748840 177.537598 88.306969 0.607657 0 person
1 169.251160 63.801407 222.469238 169.500259 0.555439 0 person
Saved 1 image to output52
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_4626.jpg:
xmin ymin xmax ymax confidence class name
0 101.158691 19.309608 172.481598 168.649536 0.846888 0 person
1 160.050735 83.969330 170.573074 106.003815 0.288472 41 cup
Saved 1 image to output53
Object counts: name
person 1
cup 1
Name: count, dtype: int64
Results for thumbnail_3445.jpg:
xmin ymin xmax ymax confidence class \
0 193.976654 6.651924 318.543304 170.813904 0.923951 0
1 30.537571 46.784252 116.442978 169.860519 0.857799 0
2 129.496887 111.624481 167.422363 127.380661 0.560207 45
3 48.540405 116.708023 247.623566 171.830215 0.452929 60
4 15.778946 105.227631 38.750134 171.864441 0.295880 56
5 99.217392 108.180389 127.545090 122.639557 0.273594 45
name
0 person
1 person
2 bowl
3 dining table
4 chair
5 bowl
Saved 1 image to output54
Object counts: name
person 2
bowl 2
dining table 1
chair 1
Name: count, dtype: int64
Results for thumbnail_5247.jpg:
xmin ymin xmax ymax confidence class name
0 237.878387 29.059677 320.000000 169.417175 0.816215 0 person
1 10.979935 5.765930 239.388382 171.036133 0.598696 0 person
Saved 1 image to output55
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_1699.jpg:
xmin ymin xmax ymax confidence class name
0 190.774094 11.055222 320.000000 93.422134 0.409655 24 backpack
1 3.926575 19.543854 192.496094 166.544785 0.402526 0 person
2 289.293640 7.737494 319.914246 33.194191 0.304620 29 frisbee
Saved 1 image to output56
Object counts: name
backpack 1
person 1
frisbee 1
Name: count, dtype: int64
Results for thumbnail_3898.jpg:
xmin ymin xmax ymax confidence class name
0 114.886719 19.059753 149.560120 102.785683 0.804445 0 person
1 159.604385 16.683743 190.343216 88.616684 0.800794 0 person
2 62.854935 98.494354 93.992386 170.672516 0.728787 0 person
3 89.676476 63.079086 131.346680 150.976059 0.683238 0 person
4 0.339666 62.535774 52.461746 142.538086 0.367685 0 person
5 0.333313 65.679573 52.962120 141.874054 0.295822 20 elephant
Saved 1 image to output57
Object counts: name
person 5
elephant 1
Name: count, dtype: int64
Results for thumbnail_4839.jpg:
xmin ymin xmax ymax confidence class name
0 82.920807 16.141014 285.400238 169.616791 0.728031 0 person
Saved 1 image to output58
Object counts: name person 1 Name: count, dtype: int64 Results for thumbnail_4574.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output59
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_1377.jpg:
xmin ymin xmax ymax confidence class name
0 73.470001 6.139549 128.507660 169.341125 0.916058 0 person
1 44.041840 31.264500 77.113052 143.782593 0.856360 0 person
2 187.086700 16.840050 318.248352 166.675720 0.250390 13 bench
Saved 1 image to output60
Object counts: name
person 2
bench 1
Name: count, dtype: int64
Results for thumbnail_3862.jpg:
xmin ymin xmax ymax confidence class name
0 171.826752 75.508713 240.844025 170.424866 0.918988 0 person
1 276.243866 53.097939 319.812408 169.375824 0.867047 0 person
2 113.146454 51.449036 176.015503 171.291748 0.708953 0 person
3 20.138355 21.432114 122.580246 169.726852 0.670452 0 person
Saved 1 image to output61
Object counts: name
person 4
Name: count, dtype: int64
Results for thumbnail_0378.jpg:
xmin ymin xmax ymax confidence class name
0 194.087830 0.828224 318.320251 126.794373 0.641911 0 person
1 5.028244 0.846451 153.324051 160.507614 0.257506 0 person
Saved 1 image to output62
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_4545.jpg:
xmin ymin xmax ymax confidence class name
0 139.352173 27.057243 317.233337 169.692352 0.265721 62 tv
Saved 1 image to output63
Object counts: name
tv 1
Name: count, dtype: int64
Results for thumbnail_3979.jpg:
xmin ymin xmax ymax confidence class name
0 190.761841 41.980858 317.221466 170.238678 0.922442 0 person
1 59.576996 7.895622 197.433319 169.984100 0.821896 0 person
2 0.073750 67.026413 20.864586 143.365295 0.252041 0 person
Saved 1 image to output64
Object counts: name
person 3
Name: count, dtype: int64
Results for thumbnail_3282.jpg:
xmin ymin xmax ymax confidence class name
0 211.477722 4.655518 320.000000 171.839111 0.910799 0 person
1 0.000000 23.410858 146.875397 171.064514 0.879793 0 person
2 104.470978 10.857124 265.247894 171.711426 0.650472 0 person
3 17.181168 0.000000 130.876068 117.310829 0.408740 0 person
Saved 1 image to output65
Object counts: name
person 4
Name: count, dtype: int64
Results for thumbnail_4012.jpg:
xmin ymin xmax ymax confidence class name
0 211.324768 53.164761 319.998413 170.327530 0.895948 0 person
1 0.628002 4.830811 80.950760 171.659821 0.845913 0 person
2 53.243706 23.945839 230.730347 170.081024 0.835954 0 person
Saved 1 image to output66
Object counts: name
person 3
Name: count, dtype: int64
Results for thumbnail_1021.jpg:
xmin ymin xmax ymax confidence class name
0 149.861023 16.046036 291.207458 170.548782 0.921506 0 person
1 1.531708 1.513390 153.490204 171.160492 0.667511 0 person
Saved 1 image to output67
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_1598.jpg:
xmin ymin xmax ymax confidence class \
0 126.216827 35.160980 211.332458 169.978455 0.783211 0
1 292.956787 71.623627 319.963989 170.862137 0.396760 67
2 0.790085 6.666939 87.218994 170.112122 0.333143 1
3 165.729187 82.807693 176.400269 109.252792 0.254341 27
name
0 person
1 cell phone
2 bicycle
3 tie
Saved 1 image to output68
Object counts: name person 1 cell phone 1 bicycle 1 tie 1 Name: count, dtype: int64 Results for thumbnail_5484.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output69
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_2531.jpg:
xmin ymin xmax ymax confidence class name
0 170.470856 125.422737 194.173828 139.322113 0.653060 49 orange
1 0.626236 7.589973 176.629318 168.125641 0.546856 0 person
2 134.373108 103.405441 177.702301 133.711777 0.276823 47 apple
3 143.306168 130.781815 169.047806 139.649734 0.276766 47 apple
Saved 1 image to output70
Object counts: name
apple 2
orange 1
person 1
Name: count, dtype: int64
Results for thumbnail_3759.jpg:
xmin ymin xmax ymax confidence class name
0 185.313995 1.310852 319.36377 170.152939 0.526603 0 person
Saved 1 image to output71
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_5026.jpg:
xmin ymin xmax ymax confidence class name
0 0.142338 17.425537 67.875641 129.616791 0.938958 54 donut
1 143.749176 53.415813 283.605408 171.608902 0.937688 54 donut
2 64.103645 0.523243 170.689819 76.299057 0.934066 54 donut
3 5.105659 123.523270 149.794250 171.094986 0.886766 54 donut
4 256.606140 0.425198 319.771362 89.695084 0.861960 54 donut
5 289.287476 98.720673 319.910400 170.751129 0.814197 54 donut
6 0.674068 0.000000 89.452621 20.903061 0.691111 54 donut
7 159.298264 0.000000 262.439148 29.352699 0.572505 54 donut
Saved 1 image to output72
Object counts: name
donut 8
Name: count, dtype: int64
Results for thumbnail_0254.jpg:
xmin ymin xmax ymax confidence class \
0 164.427383 67.048492 273.870483 170.180237 0.913898 0
1 70.134346 19.490311 205.201080 170.812210 0.854274 0
2 5.394274 4.411087 66.836456 169.848083 0.328622 72
name
0 person
1 person
2 refrigerator
Saved 1 image to output73
Object counts: name
person 2
refrigerator 1
Name: count, dtype: int64
Results for thumbnail_4042.jpg:
xmin ymin xmax ymax confidence class name
0 118.218582 26.487823 275.852234 170.865799 0.909281 0 person
1 237.848770 98.459358 268.461914 125.023445 0.393558 0 person
Saved 1 image to output74
Object counts: name person 2 Name: count, dtype: int64 Results for thumbnail_0428.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output75
Object counts: Series([], Name: count, dtype: int64) Results for thumbnail_5444.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output76
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_0769.jpg:
xmin ymin xmax ymax confidence class name
0 183.215271 5.020081 262.348328 171.989487 0.909600 0 person
1 80.604202 30.882828 156.781235 171.124176 0.868988 0 person
Saved 1 image to output77
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_3731.jpg:
xmin ymin xmax ymax confidence class name
0 41.450253 23.539391 140.269623 170.491852 0.852279 0 person
Saved 1 image to output78
Object counts: name person 1 Name: count, dtype: int64 Results for thumbnail_3078.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output79
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_1116.jpg:
xmin ymin xmax ymax confidence class \
0 229.355499 4.368484 310.134003 169.587250 0.918370 0
1 80.349701 66.505928 138.206833 171.436829 0.774849 0
2 105.281227 102.943939 166.011078 172.149261 0.382820 26
3 191.874115 63.627968 232.225433 136.065613 0.344178 58
4 81.738907 91.346222 134.283218 171.333679 0.258839 26
name
0 person
1 person
2 handbag
3 potted plant
4 handbag
Saved 1 image to output80
Object counts: name
person 2
handbag 2
potted plant 1
Name: count, dtype: int64
Results for thumbnail_2232.jpg:
xmin ymin xmax ymax confidence class name
0 143.155670 26.374390 201.231689 171.148529 0.826259 0 person
1 206.280121 44.986691 264.959686 169.473175 0.787986 0 person
2 166.232986 101.004951 206.110367 140.483139 0.414332 26 handbag
3 79.511681 45.549789 95.488655 79.918976 0.399651 0 person
4 0.874062 1.985268 145.176880 167.816589 0.277828 6 train
Saved 1 image to output81
Object counts: name
person 3
handbag 1
train 1
Name: count, dtype: int64
Results for thumbnail_0721.jpg:
xmin ymin xmax ymax confidence class \
0 101.000114 8.723999 308.137543 169.073715 0.681804 0
1 166.830383 110.730492 203.220947 142.293533 0.625566 65
2 206.456589 62.827759 234.059677 93.706116 0.412990 67
name
0 person
1 remote
2 cell phone
Saved 1 image to output82
Object counts: name person 1 remote 1 cell phone 1 Name: count, dtype: int64 Results for thumbnail_1710.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output83
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_0364.jpg:
xmin ymin xmax ymax confidence class name
0 36.987572 0.463600 233.869476 168.912323 0.495480 0 person
1 0.125710 42.961285 75.019386 169.528183 0.477114 0 person
Saved 1 image to output84
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_4016.jpg:
xmin ymin xmax ymax confidence class name
0 208.082336 54.720047 315.362915 170.697571 0.897045 0 person
1 59.007256 11.698898 217.114288 168.307190 0.863105 0 person
2 0.637535 4.217789 83.680183 170.430283 0.847649 0 person
Saved 1 image to output85
Object counts: name
person 3
Name: count, dtype: int64
Results for thumbnail_3806.jpg:
xmin ymin xmax ymax confidence class \
0 188.709305 28.699776 271.926575 169.180725 0.890608 0
1 80.782745 119.574829 99.778595 147.630920 0.572172 58
2 223.176270 61.173210 231.308838 72.060478 0.298346 67
3 103.744308 136.159195 129.783234 167.233261 0.297070 58
4 107.466209 68.414505 122.635109 101.645798 0.276891 58
5 62.756264 129.628647 81.561714 169.486740 0.270850 58
6 93.042259 87.998512 108.758293 114.643257 0.265385 58
7 78.901833 53.107529 106.170174 91.836105 0.263454 58
name
0 person
1 potted plant
2 cell phone
3 potted plant
4 potted plant
5 potted plant
6 potted plant
7 potted plant
Saved 1 image to output86
Object counts: name
potted plant 6
person 1
cell phone 1
Name: count, dtype: int64
Results for thumbnail_1445.jpg:
xmin ymin xmax ymax confidence class name
0 122.655396 3.49102 279.600311 169.836914 0.947074 0 person
Saved 1 image to output87
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_4095.jpg:
xmin ymin xmax ymax confidence class name
0 221.555740 71.861191 264.665710 171.132767 0.911868 0 person
1 156.829025 72.107063 204.332443 169.934998 0.872741 0 person
2 61.355408 81.234375 78.703644 133.764038 0.672436 0 person
Saved 1 image to output88
Object counts: name
person 3
Name: count, dtype: int64
Results for thumbnail_0875.jpg:
xmin ymin xmax ymax confidence class \
0 96.757240 9.862465 233.891357 171.548645 0.916444 0
1 73.371826 2.207262 109.689484 33.727257 0.605982 58
2 0.000000 6.410263 73.038467 141.025604 0.471249 56
3 39.984402 21.511345 132.989624 153.620514 0.379645 56
4 0.915382 71.377609 76.028465 154.904160 0.275209 60
5 6.703049 70.831055 78.062180 154.202209 0.252643 56
name
0 person
1 potted plant
2 chair
3 chair
4 dining table
5 chair
Saved 1 image to output89
Object counts: name
chair 3
person 1
potted plant 1
dining table 1
Name: count, dtype: int64
Results for thumbnail_0247.jpg:
xmin ymin xmax ymax confidence class name
0 112.381332 22.742783 217.997726 172.18782 0.887781 0 person
Saved 1 image to output90
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_4053.jpg:
xmin ymin xmax ymax confidence class name
0 70.264435 5.718681 184.477112 169.637390 0.899587 0 person
1 165.884216 55.873562 278.725311 170.362183 0.787739 0 person
Saved 1 image to output91
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_5119.jpg:
xmin ymin xmax ymax confidence class name
0 210.857208 94.237427 220.066742 124.031525 0.61233 0 person
Saved 1 image to output92
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_3992.jpg:
xmin ymin xmax ymax confidence class name
0 97.227501 4.233742 279.014252 171.454285 0.58972 0 person
Saved 1 image to output93
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_3569.jpg:
xmin ymin xmax ymax confidence class name
0 173.987579 35.839203 312.622864 172.396912 0.911581 0 person
1 0.000000 1.292839 154.701416 171.674835 0.905045 0 person
Saved 1 image to output94
Object counts: name
person 2
Name: count, dtype: int64
Results for thumbnail_5238.jpg:
xmin ymin xmax ymax confidence class name
0 203.328339 17.522545 289.887665 171.215302 0.931066 0 person
Saved 1 image to output95
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_3768.jpg:
xmin ymin xmax ymax confidence class name
0 178.414078 61.598492 271.140656 170.68692 0.320065 14 bird
Saved 1 image to output96
Object counts: name
bird 1
Name: count, dtype: int64
Results for thumbnail_2453.jpg:
xmin ymin xmax ymax confidence class name
0 89.601028 2.355194 273.311035 168.651642 0.953322 0 person
1 158.248856 104.262451 197.786118 171.557343 0.907252 27 tie
Saved 1 image to output97
Object counts: name person 1 tie 1 Name: count, dtype: int64 Results for thumbnail_4515.jpg: Empty DataFrame Columns: [xmin, ymin, xmax, ymax, confidence, class, name] Index: []
Saved 1 image to output98
Object counts: Series([], Name: count, dtype: int64)
Results for thumbnail_4629.jpg:
xmin ymin xmax ymax confidence class name
0 105.686798 18.330505 197.794342 170.699188 0.844963 0 person
Saved 1 image to output99
Object counts: name
person 1
Name: count, dtype: int64
Results for thumbnail_4249.jpg:
xmin ymin xmax ymax confidence class name
0 119.076599 21.926826 320.0 171.252808 0.487322 0 person
Saved 1 image to output100
Object counts: name person 1 Name: count, dtype: int64
print("Total counts of detected objects:")
for object_type, count in total_counts.items():
print(f"{object_type}: {count}")
Total counts of detected objects: 4: 3 2: 31 1: 85 3: 11 5: 2 8: 1 6: 1
SUMMARY¶
Film Description¶
Honey, I Shrunk the Kids¶
1989 PG 1h 33m
From the IMDB website:
"The scientist father of a teenage girl and boy accidentally shrinks his and two other neighborhood teens to the size of insects. Now the teens must fight diminutive dangers as the father searches for them."
IMDB website. (n.d.). imdb.com. Retrieved April 27, 2024, from https://www.imdb.com/title/tt0097523/
In 'Honey, I Shrunk the Kids,' an eccentric inventor, Wayne Szalinski, accidentally shrinks his and his neighbor's children with his experimental shrink ray. The miniature kids must navigate a perilous journey across their now-gigantic backyard, encountering obstacles like insects and sprinklers, as they try to return home.
The film is notable for its creative visual effects that magnify ordinary environments into epic landscapes. It's a blend of adventure, humor, and family dynamics, ultimately showcasing the children's resourcefulness and the parents' determination to rescue their kids. The movie was a commercial success and spawned a franchise including sequels and a television series.
Methods Summary¶
This section should highlight methods you used in your exploratory analysis. You should include at least one clustering technique or develop another way to relate frames to other frames. You should also consider dimensionality reduction.
Each thumbnail is one frame of the Movie. Each thumbnail has been analyzed and embedded via CLIP. Referenced Model: https://replicate.com/andreasjansson/clip-features/examples .
For hypothesis 1 we used the following methods¶
- Explore CLIP through a natural query for objects and investigate the surounding frames.
- Look at the Euclidean distance of similar frames.
- Explore similar frames for insights.
Hunches and Hypotheses¶
This section should summarize the questions that you asked about the film that could potentially be answered by exploratory analysis. You should ask at least three questions.
Hypothesis 1
- Using Clip to find an object and observe ~5 frames before and after and interpret the results.
- A hypothesis we discussed was that certain characters, or objects in the movie would be used for dramatic or comedic effect. We are using CLIP to identify these objects and then explore the surrounding frames to discover any evidence to support or refute this hypothesis.
Hypothesis 2
- Scene Consistency and Transition - Frames that are visually and thematically similar cluster together tightly in t-SNE and PCA visualizations, and distinct clusters correspond to different scenes or settings in the movie.
- Rationale: This hypothesis tests the ability of CLIP embeddings, which capture both visual and semantic content, to differentiate between distinct scenes based on their visual content and thematic elements.
Hypothesis 3
- Look for a model that can analyze an image to identify the type of objects in the image
- Use counter to count the number of each object in the image
Results and Interpretation¶
Hypothesis 1:
- Begin by looking for the top images of a dog, lawnmower and an ant returned from CLIP queries.
- Explore the frames surrounding the images identified by the above queries.
These are the top matches returned by CLIP.
- 'a photo of a white and brown dog' = thumbnail_5039.jpg
- 'a photo of a lawnmower' = thumbnail_4734.jpg
- 'a photo of an ant' = thumbnail_3231.jpg
The indexes we are interested in are 5038, 4733 and 3230.
- For the dog image:
From this sequence of frames, we see the children running, the parents talking, the dog looking up at the table, the kids waving their arms for attention and ends with the father narrowly avoiding accidentally eating his shrunken son. The dog being in the center anchors the dramatic tension of the scene. He almost looks like he is trying to warn the father or somehow knows more than the humans. Sentient animals with superior knowledge or wisdom than humans has been a trope used in several other films which depict animals (especially in comedies) as central figures.
- For the lawnmower image:
In this scene a neighbor kid is remotely controlling the lawnmower while the children are in the grass. This is a clear action scene with a threat elliciting dramatic flight.
There doesn't seem to be many similar frames in the movie for the lawnmower. This is probably because it is a singular threat evnt used for dramatic tension in a single scene. It does look like it may appear around 790, so let's see if that is foreshadowing.
In the scene around index 790 we see the son showing the remote controlled lawnmower to the boy who appears later controlling the mower in the flight scene. So this is an example of foreshadowing in a film that we identified by using the data to point us to something interesting!
- For the ant image:
In this scene, we see a boy easting some white substance, followed by a girl who appears scared or surprised. The next frame shows the cause, which is an ant menacing the kids, which is noticed by the boy as well, and the kids seem to begin fleeing the threat.
The ant seems to be a later threat in the film, possibly also foreshadowed around 2180. However it appears to show up for an extended time around 4400.
2180 looks like it matches the ant scene because of the setting. They both have extensive vegetation in the scene, which probably accounts for the lower distance than the later match around 4400.
This identified the same scene that we discovered through exploration.
- We began with a hypothesis that the dog would be comedic relief and the ant and lawnmower would be for dramatic effect or threat of danger.We used CLIP to identify the inital frames of interest, then proceeded analyzing the target frames using the Euclidian distance, performing more exploration of similar frames and then circling back to a more specific CLIP question. The original hypothesis that these objects would be used as comedic relief or dramatic seems to have some justification, as the lawnmower and ant appear intially as dramatic threats, but from this discovery, it looks like the ant actually fought a scorpion and saved the kids, which indicated it was also used to elicit a sympathetic response from the audience.
Hypothesis 2:
- After removing much of the noise from opening and closing credits frames and applying t-SNE and K-means we examined samples from identified clusters and we can observe that frames correspond to specific scenes or types of scenes (indoor vs outdoor, calm vs action-packed). Embeddings effectively capture scene-specific features and can be used to segment the movie based on visual content. Also, clusters seem to reflect the proper timeframe and transition of the movie. t-SNE seemed to work better than PCA at handling and splitting clusters with visual correlation.
Hypothesis 3:
- We used Yolo model, an open source model we got from the internet, to identify objects in an image. We initially analyzed the entire images in the thumpnail_folder, but found out that it wasn't a good idea because the code was slow, so we decided to analyze the first 100 images in the image frame.The detected results are stored in a DataFrame df, which is printed out. The DataFrame contains columns such as xmin, xmax, ymin, ymax, confidence, and name of the detected object. The detected objects are overlaid on the image with bounding boxes and the image is displayed using matplotlib. After all images are processed, the total counts of detected objects across all images are printed. This provides a summary of how many of each type of object were detected in the analyzed images.
Reflection¶
Reflect on your process of analysis. What worked well and did not work well? Describe the limitations of the work and describe what you would work on with more time.
We initially began exploring CLIP to discover the capabilities it provided. We were impressed with the model's ability to return results from simple english questions, so we brainstormed for some ideas that we could explore it as well as other tools to compare and contrast CLIP to other tools and also to discover what it could tell us about the film itself.
We came up with the hypothesis that we could identify some objects in the film and explore whether they provided an assumed purpose in the movie. We did achieve (subjectively) a confirmation tthat the objects were used in a way that coincided with our hypothesis. What was interesting is the way that our initial exploration led to a secondary use that effectively used CLIP to identify a further area of research. This resulted in the surprising find that the ant seems to not only be used as a threat but also as an ally to evoke both fear and empathy.